## Replication code for "City Limits to Partisan Polarization" by Amalie Jensen,
## William Marble, Kenneth Scheve, and Matthew Slaughter

# This file analyzes the partisan hierarchical model results. It outputs second-level
# regression tables (in the Appendix of the paper) as well the partisan CAMCE
# plots (Figure 5). 

# Relies on having estimated a model with the following covariates (colnames)
# 7-point Party ID scale (named pid7_2)
# Age buckets (age3150 = 31 to 50, etc.)
# Race indicators (black, latino, other)
# Sex indicator (female)
# Education indicator (college)
# Income quartile within MSA (inc_quartile)
# Indicator for employment status (looking_for_work)
# Homeownership (homeowner)
# Number of years living in MSA (years_in_msa2)
# MSA indicators (msa)

library(rstan)
library(dplyr)
library(ggpubr)
library(ggplot2)
library(ggthemes)
library(ggridges)
library(stringr)
library(parallel)
if(!require(wpmarble)){
  devtools::install_github("wpmarble/wpmarble")
  library(wpmarble)
}

theme_set(theme_bw() + theme(panel.spacing = unit(0, "in")))


source("Code/hlm/functions/stan_utility.R")
source("Code/hlm/functions/summarize_results.R")

select <- dplyr::select
extract <- rstan::extract

set.seed(15419) # from random.org





# Load model results ------------------------------------------------------



load("Data/demos_socioecon_pid7_full_sample.rdata")

# extract model parameters (but not all of them, for memory reasons...)
parnames = sampout@model_pars
keep = parnames[grep("gamma", parnames)]
keep = keep[!grepl("tilde", keep)]
params = rstan::extract(sampout, par = keep)
rm(sampout); gc()
params %>% object.size

# get individual covariate matrix from standat object. this is the Z matrix in
# the notation of the paper.
ind_covariates = standat$ind_covariates
rm(standat); gc()

# store conjoint levels
educ_levels    = levels(ind_dat$educ)
hieduc_levels  = levels(ind_dat$hieduc)
gov_levels     = levels(ind_dat$gov)
invest_levels  = levels(ind_dat$invest)
workers_levels = levels(ind_dat$workers)
local_levels   = levels(ind_dat$local)

educ_levels    = educ_levels[!(educ_levels %in% c("skipped", "not asked")) & !grepl("^Keep current", educ_levels)]
hieduc_levels  = hieduc_levels[!(hieduc_levels %in% c("skipped", "not asked")) & !grepl("^Keep current", hieduc_levels)]
gov_levels     = gov_levels[!(gov_levels %in% c("skipped", "not asked")) & !grepl("^Keep current", gov_levels)]
invest_levels  = invest_levels[!(invest_levels %in% c("skipped", "not asked")) & !grepl("^Keep current", invest_levels)]
workers_levels = workers_levels[!(workers_levels %in% c("skipped", "not asked")) & !grepl("^Keep policies", workers_levels)]
local_levels   = local_levels[!(local_levels %in% c("skipped", "not asked")) & !grepl("^Keep current", local_levels)]


# add line breaks to the levels, which are useful for making tables. 
educ_lev_labs    = add_linebreaks(educ_levels)
hieduc_lev_labs  = add_linebreaks(hieduc_levels)
gov_lev_labs     = add_linebreaks(gov_levels)
invest_lev_labs  = add_linebreaks(invest_levels)
workers_lev_labs = add_linebreaks(workers_levels)
local_lev_labs   = add_linebreaks(local_levels)

# shorter level labels
educ_lev_labs2 = c("Charter\\nSchools", "School\\nVouchers", "Free\\nPre-School", "Pay Teachers\\nMore")
hieduc_lev_labs2 = c("Community\\nCollege","Local Public\\nUniversities", "Technical\\nTraining", "Student grant\\nPrograms")
gov_lev_labs2 = c("Consolidate\\nLocal Gov't", "More Power\\nto the State")
invest_lev_labs2 = c("Attract\\nNew Businesses", "Stimulate\\nExisting Companies", "Encourage Investment\\nBy Charities")
workers_lev_labs2 = c("Limit Unions'\\nPower", "Expand Unions'\\nPower", "Worker\\nTraining Vouchers", "Tax Breaks\\nto Entrepreneurs")
local_lev_labs2 = c("Affordable\\nHousing", "Public\\nTransportation", "Safety and\\nCrime Prevention")

# Covariate names
covs = colnames(ind_covariates)
print(covs)
covs =c("Intercept", 
        "Party: Weak Dem.", 
        "Party: Lean Dem.",
        "Party: Independent",
        "Party: Lean Rep.",
        "Party: Weak Rep.",
        "Party: Strong Rep.",
        "Age: 31-50", "Age: 51-65", "Age: 65+", 
        "Race: Black", "Race: Latino", "Race: Other",
        "Female", 
        "College", 
        "Income: Second Quartile", "Income: Third Quartile", "Income: Fourth Quartile", 
        "Looking for Work", 
        "Homeowner", 
        "Years in MSA: 1-5", "Years in MSA: 6-10", "Years in MSA: 11-15", 
        "Years in MSA: 16+",
        "MSA: Cleveland", "MSA: Houston", "MSA: Indianapolis",
        "MSA: Memphis", "MSA: Rochester", "MSA: St. Louis", 
        "MSA: Seattle")
covs2 = covs[-1]
covs2 = c(covs2, "Intercept")


# save params + levels (smaller than the full stan output file loaded above)
tosave = list(params = params, 
              levels = list(levs = list(educ_levels = educ_levels,
                                        hieduc_levels = hieduc_levels,
                                        gov_levels = gov_levels,
                                        invest_levels = invest_levels,
                                        workers_levels = workers_levels,
                                        local_levels = local_levels),
                            plot_labs = list(
                              educ_lev_labs = educ_lev_labs,
                              hieduc_lev_labs = hieduc_lev_labs,
                              gov_lev_labs = gov_lev_labs,
                              invest_lev_labs = invest_lev_labs,
                              workers_lev_labs = workers_lev_labs,
                              local_lev_labs = local_lev_labs
                            )),
              covariates = covs2, 
              ind_covariates = ind_covariates)
saveRDS(tosave, file = "Data/params_gamma_socioecon_pid7.rds")

to_rm = ls()[grepl("_sum$", ls())]
rm(list=to_rm); gc()



# Make second-level regression tables -------------------------------------

# These are Tables A-7 through A-12 in Appendix F.
# Uses some custom functions defined in Code/hlm/function/summarize_results.R

# Extract summaries of the gammas
gamma_educ_sum    = extract_gamma(params, "gamma_educ", level_names = educ_levels, cov_names = covs)
gamma_hieduc_sum  = extract_gamma(params, "gamma_hieduc", level_names = hieduc_levels, cov_names = covs)
gamma_gov_sum     = extract_gamma(params, "gamma_gov", level_names = gov_levels, cov_names = covs)
gamma_invest_sum  = extract_gamma(params, "gamma_invest", level_names = invest_levels, cov_names = covs)
gamma_workers_sum = extract_gamma(params, "gamma_workers", level_names = workers_levels, cov_names = covs)
gamma_local_sum   = extract_gamma(params, "gamma_local", level_names = local_levels, cov_names = covs)
gamma_int_sum     = extract_gamma(params, "gamma_intercept", level_names = "Intercept", cov_names = covs)

# print regression tables for the gammas
make_table(gamma_educ_sum, cov_order = covs2, level_order = educ_levels, level_labels = educ_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*",checks = "MSA",          out.file = "Output/Tables/demos_pid7_educ.tex")
make_table(gamma_hieduc_sum, cov_order = covs2, level_order = hieduc_levels, level_labels = hieduc_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*", checks = "MSA",   out.file = "Output/Tables/demos_pid7_hieduc.tex")
make_table(gamma_gov_sum, cov_order = covs2, level_order = gov_levels, level_labels = gov_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*", checks = "MSA",            out.file = "Output/Tables/demos_pid7_gov.tex")
make_table(gamma_invest_sum, cov_order = covs2, level_order = invest_levels, level_labels = invest_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*", checks = "MSA",   out.file = "Output/Tables/demos_pid7_invest.tex")
make_table(gamma_workers_sum, cov_order = covs2, level_order = workers_levels, level_labels = workers_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*", checks = "MSA",out.file = "Output/Tables/demos_pid7_workers.tex")
make_table(gamma_local_sum, cov_order = covs2, level_order = local_levels, level_labels = local_lev_labs2, digits = 3, ci = FALSE, aster = TRUE, omit = "^MSA*", checks = "MSA",      out.file = "Output/Tables/demos_pid7_local.tex")








# Run party summary function ----------------------------------------------


# Summarize party results using the summarisePartyResults() function, which gives 
# density estimates and posterior means for predicted AMCE's, setting the
# party levels to Strong D or Strong R while leaving other covariates unchanged.
# Also reports the posterior probability that the means of the predicted 
# distributions have different signs. 

# This is the analysis presented in Figure 5 (plot made in next section).



# Create "counterfactual" matrices that leave all covariates as is, except
# setting party to strong D or strong R.
dem <- rep <- ind_covariates
dem[, c("pid7_2Weak Dem.", "pid7_2Lean Dem.", "pid7_2Independent", "pid7_2Lean Rep.", 
        "pid7_2Weak Rep.", "pid7_2Strong Rep.")] = 0
rep[, c("pid7_2Weak Dem.", "pid7_2Lean Dem.", "pid7_2Independent", "pid7_2Lean Rep.", 
        "pid7_2Weak Rep.")] = 0
rep[, "pid7_2Strong Rep."] = 1



# Do it in parallel b/c it's computationally intensive (due to high-dimensional
# matrix multiplication and repeatedly re-estimating kernel densities).
cl = makeCluster(4)
clusterExport(cl, c("dem", "rep", "params", "summarisePartyResults"))
facts = c("educ", "hieduc", "invest", "workers", "gov", "local")
res = parLapply(cl, facts, function(x) summarisePartyResults(params, factorname = x, dem = dem, rep = rep, levs = NULL))
stopCluster(cl)

# extract densities and means from res
den = lapply(res, function(x) x$density)
party_means = lapply(res, function(x) x$mean)










# Recode levels and factors for plotting ----------------------------------


# level recodes
educ_recode = "1 = 'Charter schools'; 2 = 'Vouchers to schools'; 3 = 'Free pre-school'; 4 = 'Pay teachers more'"
hieduc_recode = "1 = 'Community colleges'; 2 = 'Local public universities'; 3 = 'Technical vocational training'; 4 = 'Student grant programs'"
invest_recode = "1 = 'Attract new businesses'; 2 = 'Stimulate existing companies'; 3 = 'Encourage investment by charities'"
gov_recode = "1 = 'Consolidate local government'; 2 = 'More power to the state'"
workers_recode = "1 = 'Limit unions\\' power'; 2 = 'Expand unions\\' power'; 3 = 'Worker training vouchers'; 4 = 'Tax breaks to entrepreneurs'"
local_recode =  "1 = 'Affordable housing'; 2 = 'Public transportation'; 3 = 'Public safety and crime prevention'"

for (i in 1:length(den)) { 
  f = den[[i]]$factor[1]
  rs = get(paste0(f, "_recode"))
  den[[i]]$level_lab = car::recode(den[[i]]$level, rs)
  party_means[[i]]$level_lab = car::recode(party_means[[i]]$level, rs) 
}
den = bind_rows(den)
party_means = bind_rows(party_means)


# factor recodes
frec = "'educ' = 'Education'; 'hieduc' = 'Higher Education'; 'gov' = 'Governance'; 'invest' = 'Investment & Taxes';  'local' = 'Local Services'; 'workers' = 'Workers & Entrepreneurs'"
den$factor = car::recode(den$factor, frec)
party_means$factor = car::recode(party_means$factor, frec)

# reorder factors
den$factor = factor(den$factor, c("Investment & Taxes", "Workers & Entrepreneurs",
                                  "Local Services", "Governance", "Education", "Higher Education"))
party_means$factor = factor(party_means$factor, c("Investment & Taxes", "Workers & Entrepreneurs",
                                  "Local Services", "Governance", "Education", "Higher Education"))








# Make plot ---------------------------------------------------------------


# Subset density for plotting so we don't have a bunch of lines all close to y=0
den_to_plot = filter(den, dens_mean >= quantile(dens_mean, .025), x <= .25)

# include a label in party_means for Pr(different signs)
party_means = party_means %>% 
  mutate(star = paste0(round(party_p_val*100, 1), "%"))

ggplot() + 
  geom_vline(xintercept = 0, lty = 3) + 
  geom_ridgeline(data = den_to_plot,
                 aes(x = x, y = level_lab, height = dens_mean, colour = party), 
                 fill = "transparent", scale = .04, min_height = .01) + 
  geom_point(data = party_means, aes(x = dem_mean, y = level_lab, colour = "Strong Democrat")) + 
  geom_point(data = party_means, aes(x = rep_mean, y = level_lab, colour = "Strong Republican")) + 
  geom_text(data = party_means, aes(y = level_lab, label = star), x = .24, hjust = 0, size = 3) + 
  labs(x = "Predicted MCE", y = NULL) + 
  facet_wrap(~factor, ncol = 1, drop = TRUE, scales = "free_y") + 
  scale_colour_manual(values = c("blue", "red"), name = NULL) + 
  scale_x_continuous(breaks = seq(-.2, .2, .1)) + 
  theme(legend.position = "bottom", panel.spacing = unit(0, "in")) + 
  coord_cartesian(xlim = c(-.25, .289))
ggsave("Output/fig5.pdf", height=8.5, width=5)
ggsave("Output/fig5.eps", height=8.5, width=5)


saveRDS(party_means, file = "Data/summarize_means.rds")
saveRDS(den, file = "Data/density_ests.rds")

